- ;History:697,24,17
- include memory.def
- data segment byte public
- NULL equ 0
- b_struc struc
- b db ?
- b_struc ends
- w_struc struc
- w dw ?
- w_struc ends
- CHR equ "C"
- BOL equ "<"
- EOL equ ">"
- ANY equ "?"
- CCL equ "["
- ECCL equ "]"
- NCCL equ "~"
- EOS equ "."
- CLOSURE equ "*"
- CLOSIZE equ 1
- extrn outpat: byte
- extrn OUTPATSIZE: abs
- extrn inpat: byte
- extrn INPATSIZE: abs
- inpatlen dw ?
- direction dw ?
- slow_search db ? ;=1 if we need to process magic chars.
- extrn textseg: word
- data ends
- bufseg segment public
- extrn topbot: word
- extrn bottop: word
- bufseg ends
- code segment byte public
- assume cs:code, ds:data
- public slowly, forward, backward
- extrn get_mark: near, set_mark_si: near
- public search
- search:
- ;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark.
- ;start searching at mark ch. If the string is found, then return the
- ; beginning in mark dh, and the end in mark dl, and cy=0. If the string
- ; wasn't found, return cy=1.
- push dx ;save the first, last marks.
- push es
- mov es,textseg
- assume es:bufseg
- push ds ;save ds
- push es
- pop ds
- assume ds:bufseg ;for get_mark
- mov al,cl ;get the end mark.
- push cx
- call get_mark
- mov di,si ;save a copy of the end.
- pop cx
- mov al,ch ;get the start mark.
- call get_mark
- pop ds ;restore ds
- assume ds:data
- mov direction,0
- cmp si,di ;start>=end?
- jb search_4 ;no. (doesn't matter if they're equal)
- mov direction,1 ;yes, go in reverse direction.
- search_4:
- cmp slow_search,0 ;must we do a slow search?
- jne search_2 ;yes.
- cmp direction,0 ;search in proper direction.
- jne search_5
- call forward
- jmp search_3
- search_5:
- call backward
- jmp search_3
- search_2:
- call slowly
- search_3:
- pop es
- assume es:data
- pop dx
- jc search_1 ;not found.
- push ds
- mov ds,textseg ;for set_mark_si
- assume ds:bufseg
- push bx ;preserve pointer to end.
- mov al,dh
- call set_mark_si ;set the first mark.
- pop si ;pushed as bx.
- mov al,dl
- call set_mark_si ;set the last mark.
- pop ds
- assume ds:data
- clc ;return a match.
- ret
- search_1:
- stc ;return no match.
- ret
- public regexp_pat
- regexp_pat:
- ;enter with si, cx->pattern.
- ;exit with cy=1 if error.
- cmp cx,INPATSIZE ;too big?
- jae regexp_5 ;yes - return cy.
- mov di,offset inpat
- rep movsb
- mov al,NULL ;store the terminating null.
- stosb
- call makepat
- jc regexp_4 ;if any error, quit now.
- ;If outpat has only CHRs, we can use the fast search. Fix up inpat so it
- ; has the real string. We must do this because of the '\' escape char.
- mov si,offset outpat
- mov di,offset inpat
- mov bp,di ;make a copy of di.
- regexp_1:
- lodsb
- cmp al,EOS ;if we got to EOS, all is ok.
- je regexp_2
- cmp al,CHR ;CHR?
- jne regexp_3 ;no - can't use fast search.
- movsb ;move the char to inpat.
- jmp regexp_1
- regexp_2:
- sub di,bp ;subtract the base of the string.
- mov inpatlen,di ;save the real length.
- mov slow_search,0
- clc
- jmp short regexp_4
- regexp_3:
- mov slow_search,1
- clc
- regexp_4:
- ret
- regexp_5:
- stc
- ret
- public literal_pattern
- literal_pattern:
- ;enter with si, cx->pattern.
- ;exit with cy=1 if error.
- cmp cx,INPATSIZE ;too big?
- jae literal_3 ;yes - return cy.
- push si
- push cx
- mov inpatlen,cx
- mov di,offset inpat
- rep movsb
- pop cx
- pop si
- mov di,offset outpat
- jcxz literal_1
- literal_2:
- mov ax,offset outpat-2
- cmp di,ax ;do we have enough room?
- jae literal_3 ;no - quit now.
- mov al,CHR
- stosb
- movsb
- loop literal_2
- literal_1:
- mov al,EOS ;store the end of string.
- stosb
- mov slow_search,0 ;we always use a fast search.
- clc
- ret
- literal_3:
- stc
- ret
- assume ds:data, es:bufseg
- slowly:
- ;es:si -> first char to look at.
- ;es:di -> after last char to look at.
- ;return cy if no match, else nc, si->start of match, bx->after end of match.
- slowly_0:
- cmp si,topbot ;at topbot already?
- jne slowly_1
- mov si,bottop
- slowly_1:
- cmp si,di ;at the end yet?
- stc ;assume not found.
- je slowly_3 ;yes - not found.
- push di
- mov di,offset outpat ;start at beginning of pattern.
- call amatch
- pop di ;restore current, end.
- jnc slowly_3 ;we found a match
- cmp direction,0 ;forwards or backwards?
- jne slowly_2 ;backwards.
- inc si
- cmp si,topbot ;at bottom of top?
- jne slowly_5 ;yes - load top of bottom.
- mov si,bottop
- jmp slowly_1
- slowly_5:
- cmp es:[si-1].w,LINENEW ;at newline?
- jne slowly_1 ;no.
- inc si ;yes - skip LF part of newline.
- jmp slowly_0
- slowly_2:
- cmp si,bottop ;at top of bottom?
- jne slowly_4 ;no.
- mov si,topbot ;yes - load bottom of top.
- slowly_4:
- dec si ;back up to previous character.
- cmp si,bottop ;at top of bottom now?
- je slowly_1 ;yes - can't possibly be split over newline.
- cmp es:[si-1].w,LINENEW ;at newline?
- jne slowly_1 ;no.
- dec si ;yes - skip to beginning of newline.
- jmp slowly_1
- slowly_3:
- ret
- forward:
- ;es:si -> first char to look at.
- ;es:di -> after last char to look at.
- ;return cy if no match, else nc, si->start of match, bx->after end of match.
- cmp si,topbot ;is start in bottom?
- jne forward_8 ;yes - no need to split.
- mov si,bottop
- forward_8:
- cmp di,bottop ;is finish in top?
- jne forward_9 ;yes - no need to split.
- mov di,topbot
- forward_9:
- cmp si,bottop ;is start in bottom?
- jae forward_1 ;yes - no need to split.
- cmp di,topbot ;is finish in top?
- jbe forward_1 ;yes - no need to split.
- push di
- mov di,topbot
- call forward ;recursively search top
- mov ax,di
- pop di
- jnc forward_2 ;we found it - exit.
- mov si,ax ;start where forward left off.
- push di
- mov di,bottop ;and end where it will begin again.
- call slowly
- pop di
- jnc forward_2 ;they found it - exit.
- mov si,bottop ;no need to save the old si.
- call forward
- jmp short forward_2 ;in any case, exit.
- forward_1:
- mov cx,di ;count the number of chars to look at.
- sub cx,si
- mov di,si ;prepare for scasb.
- mov bx,inpatlen
- dec bx
- sub cx,bx ;this many fewer chars to look at.
- jb forward_5 ;string is shorter than search.
- forward_3:
- jcxz forward_5 ;no chars to look at.
- mov si,offset inpat
- lodsb ;get the first char.
- forward_4:
- scasb ;look for the first char.
- loopnz forward_4 ;keep looking until we find it.
- jnz forward_5 ;we didn't
- xchg cx,bx ;set the count to the string length.
- push cx ;save the string length
- push di ;save the source position
- repe cmpsb ;is this it?
- mov cx,bx ;restore the search length
- pop di ;restore the source position
- pop bx ;restore the string length
- jne forward_3 ;no match - try at next position.
- cmp inpat,LF ;are we searching for an LF first string?
- jne forward_6 ;no - don't worry.
- cmp byte ptr es:[di-2],CR ;did we just match the LF part of a newline?
- je forward_3 ;yes - no match.
- forward_6:
- cmp inpat[bx],CR ;are we searching for a CR last string?
- jne forward_7 ;no - don't worry.
- cmp byte ptr es:[di+bx],LF ;did we just match the CR part of a newline?
- je forward_3 ;yes - no match.
- forward_7:
- mov si,di ;get the source position
- add bx,si ;add it to the count to get the end.
- dec si ;make it point to the first char again.
- clc
- jmp short forward_2
- forward_5:
- stc
- forward_2:
- ret
- backward:
- ;es:si -> first char to look at.
- ;es:di -> after last char to look at.
- ;return cy if no match, else nc, si->start of match, bx->after end of match.
- cmp si,bottop ;moving backwards, adjust topbot.
- jne backward_8
- mov si,topbot
- backward_8:
- cmp di,bottop ;moving backwards, adjust topbot.
- jne backward_9
- mov di,topbot
- backward_9:
- cmp di,topbot ;is finish in bottom?
- ja backward_1 ;yes - no need to split.
- cmp si,bottop ;is start in top?
- jb backward_1 ;yes - no need to split.
- push di
- mov di,bottop
- call backward ;recursively search top
- mov ax,di
- pop di
- jnc backward_2 ;we found it - exit.
- mov si,ax ;start where backward left off.
- push di
- mov di,topbot ;and end where backward will begin again.
- call slowly
- pop di
- jnc backward_2 ;they found it - exit.
- mov si,topbot ;no need to save the old si.
- call backward
- jmp short backward_2 ;in any case, exit.
- backward_1:
- mov cx,si ;count the number of chars to look at.
- sub cx,di
- mov di,si ;prepare for scasb.
- mov bx,inpatlen
- dec bx
- sub cx,bx ;this many fewer chars to look at.
- jb backward_5 ;string is shorter than search.
- sub di,bx ;back up that many chars.
- add di,2-1 ;pre-increment for loop and size.
- backward_3:
- sub di,2 ;restore next char.
- jcxz backward_5 ;no chars to look at.
- mov si,offset inpat
- lodsb ;get the first char.
- std ;now scan backwards.
- backward_4:
- scasb ;look for the first char.
- loopnz backward_4 ;keep looking until we find it.
- cld ;now compare, etc. forwards.
- jnz backward_5 ;we didn't find it.
- add di,2 ;go forwards to the next char.
- xor al,al ;in case string length-1=0.
- xchg cx,bx ;set the count to the string length.
- push cx ;save the string length
- push di ;save the source position
- repe cmpsb ;is this it?
- mov cx,bx ;restore the search length
- pop di ;restore the source position
- pop bx ;restore the string length
- jne backward_3 ;no match - try at next position.
- cmp inpat,LF ;are we searching for an LF first string?
- jne backward_6 ;no - don't worry.
- cmp byte ptr es:[di-2],CR ;did we just match the LF part of a newline?
- je backward_3 ;yes - no match.
- backward_6:
- cmp inpat[bx],CR ;are we searching for a CR last string?
- jne backward_7 ;no - don't worry.
- cmp byte ptr es:[di+bx],LF ;did we just match the CR part of a newline?
- je backward_3 ;yes - no match.
- backward_7:
- mov si,di ;get the source position
- add bx,si ;add it to the count to get the end.
- dec si ;make it point to the first char again.
- clc
- jmp short backward_2
- backward_5:
- stc
- backward_2:
- ret
- amatch:
- ;es:si -> source text
- ;ds:di -> pattern
- ;return cy if no match, else nc, bx->end of matching string
- push si ;preserve input pointers.
- push di
- amatch_1:
- mov al,[di]
- cmp al,EOS
- mov bx,si ;prepare to exit.
- je amatch_success
- cmp al,CLOSURE
- jne amatch_3
- add di,CLOSIZE
- mov bx,si ;save the first closure pattern.
- ;match as many as we can
- amatch_4:
- call omatch
- jnc amatch_4
- ;match only as many as fit the next pattern
- call patsiz
- add di,ax
- amatch_5:
- push bx
- call amatch ;try to match rest of pattern.
- pop ax ;conserve stack
- jnc amatch_success ;go if it matched.
- mov bx,ax ;restore bx.
- cmp si,bottop ;backing up past the point?
- jne amatch_8 ;no - just decrement.
- mov si,topbot ;yes - get the bottom of the top.
- amatch_8:
- dec si ;point to the previous character.
- cmp si,bx ;zero or more matches still?
- jae amatch_5 ;yes.
- stc
- jmp short amatch_exit ;can't get rest of pattern to match.
- amatch_3:
- call omatch
- jc amatch_exit ;unsuccessful - exit.
- amatch_7:
- call patsiz
- add di,ax
- jmp amatch_1
- amatch_success:
- clc
- amatch_exit:
- pop di ;restore input pointers.
- pop si
- ret
- omatch:
- ;omatch matches at most one character, and only if it returns true at
- ; omatch_yes. When we get to omatch_yes, we see if we are at the point.
- ;es:si -> source text
- ;ds:di -> pattern
- mov al,[di]
- cmp al,CHR
- jne omatch_1
- mov al,es:[si]
- cmp al,[di+1]
- jne omatch_no
- inc si
- jmp omatch_yes
- omatch_1:
- cmp al,BOL
- jne omatch_2
- cmp si,bottop ;are we at the point?
- jne omatch_1_1 ;no.
- push si ;yes - have to look at the top.
- mov si,topbot
- cmp es:[si-2].w,LINENEW
- pop si
- jne omatch_no
- jmp omatch_yes
- omatch_1_1:
- cmp es:[si-2].w,LINENEW
- jne omatch_no
- jmp omatch_yes
- omatch_2:
- cmp al,EOL
- jne omatch_3
- cmp es:[si].w,LINENEW
- jne omatch_no
- jmp omatch_yes
- omatch_3:
- cmp al,ANY
- jne omatch_4
- cmp es:[si].w,LINENEW
- je omatch_no
- inc si
- jmp omatch_yes
- omatch_4:
- cmp al,CCL
- jne omatch_5
- cmp es:[si].w,LINENEW
- je omatch_no
- call locate
- jc omatch_no
- inc si
- jmp omatch_yes
- omatch_5:
- cmp al,NCCL
- jne omatch_6
- cmp es:[si].w,LINENEW
- je omatch_no
- call locate
- jnc omatch_no
- inc si
- jmp omatch_yes
- omatch_6:
- ;error
- jmp omatch_no
- omatch_no:
- stc
- ret
- omatch_yes:
- cmp si,topbot ;at bottom of top?
- jne omatch_yes_1
- mov si,bottop ;yes, go to top of bottom.
- omatch_yes_1:
- clc
- ret
- locate:
- ;es:si -> search string
- ;ds:di -> CCL
- ;exit with cy=0 if found.
- push cx
- mov cl,[di+1] ;get the count.
- mov ch,0
- mov al,es:[si]
- push es ;save es, di
- push di
- push ds ;outpat is in ds
- pop es
- add di,2 ;di now -> characters.
- repne scasb
- pop di ;restore es,di
- pop es
- pop cx
- jne locate_1
- clc
- ret
- locate_1:
- stc
- ret
- patsiz:
- ;enter ds:di -> pat
- mov al,[di]
- cmp al,CHR
- jne patsiz_1
- mov ax,2
- ret
- patsiz_1:
- cmp al,CLOSURE
- jne patsiz_2
- mov ax,CLOSIZE
- ret
- patsiz_2:
- cmp al,BOL
- je patsiz_3
- cmp al,EOL
- je patsiz_3
- cmp al,ANY
- jne patsiz_4
- patsiz_3:
- mov ax,1
- ret
- patsiz_4:
- cmp al,CCL
- je patsiz_5
- cmp al,NCCL
- jne patsiz_6
- patsiz_5:
- mov al,[di+1]
- mov ah,0
- add ax,2
- ret
- patsiz_6:
- ;error
- ret
- assume ds:data, ds:data
- public eol_only
- eol_only:
- ;return zr if the search pattern matches eol only.
- ; we need this routine because search and replace should advance past the
- ; newline if we're matching eol only.
- cmp word ptr outpat,EOS*256 + EOL
- ret
- makepat:
- ;si -> source pat (null terminated)
- ;di -> dest pattern, dx -> last dest entry.
- ;bx -> last closure
- ;return cy=1 if error.
- mov si,0
- mov di,0
- mov bx,-1
- makepat_1:
- cmp inpat[si],NULL
- je makepat_0
- push di
- mov al,inpat[si]
- cmp al,'\' ;are we escaping something?
- jne makepat_a
- cmp inpat[si+1],NULL ;is the '\' at the end?
- je makepat_9 ;yes - just use \.
- inc si
- mov al,inpat[si] ;get the escaped char.
- jmp makepat_9 ;go stick it in.
- makepat_a:
- cmp al,ANY
- jne makepat_3
- call addset
- jmp makepat_2
- ;this really belongs at the end of makepat, but the short jump can't get there.
- makepat_0:
- mov al,EOS
- call addset
- cmp di,dx
- jne makepat__0_1
- stc
- ret
- makepat__0_1:
- clc
- ret
- makepat_3:
- cmp al,BOL
- jne makepat_7
- cmp si,0
- jne makepat_6
- call addset
- jmp makepat_2
- makepat_6:
- call addchar
- jmp makepat_2
- makepat_7:
- cmp al,EOL
- jne makepat_8
- cmp inpat[si+1],NULL
- jne makepat_9
- call addset
- jmp makepat_2
- makepat_9:
- call addchar
- jmp makepat_2
- makepat_8:
- cmp al,CCL
- jne makepat_10
- call getccl
- jnc makepat_2
- pop di
- stc
- ret
- makepat_10:
- cmp al,CLOSURE
- jne makepat_11
- cmp bx,0 ;is bx>0?
- jnge makepat_12 ;no - not closure.
- mov al,outpat[bx]
- cmp al,CLOSURE ;trying to close a closure?
- je makepat_12 ;yes - not closure.
- cmp al,BOL ;trying to close a beginning of line?
- je makepat_12 ;yes - not closure.
- call stclos
- add sp,2 ;throw away the old previous.
- push bx
- jmp makepat_2
- makepat_11:
- cmp al,NCCL
- jne makepat_13
- cmp inpat[si+1],NULL ;not special at the end.
- je makepat_13
- mov al,NCCL
- call addset
- mov al,1 ;one character follows.
- call addbyte
- mov al,inpat[si+1]
- call addbyte
- inc si ;skip the NCCL.
- jmp makepat_2
- makepat_13:
- makepat_12:
- call addchar
- jmp makepat_2
- makepat_2:
- pop bx
- inc si
- jmp makepat_1
- addchar:
- ;al = CHR to put.
- push ax
- mov al,CHR
- call addset
- pop ax
- call addbyte
- ret
- addset: ;only command chars call addset.
- addbyte:
- ;al = char to put, di->dest, dx->end of dest.
- cmp di,dx
- je addbyte_1
- mov outpat[di],al
- inc di
- addbyte_1:
- ret
- stclos:
- ;si->last set added + 1
- ;bx-> last closure added
- push di
- stclos_1:
- dec di
- mov al,outpat[di]
- mov outpat[di+CLOSIZE],al
- cmp di,bx
- jne stclos_1
- stclos_2:
- mov outpat[bx],CLOSURE
- pop di
- add di,CLOSIZE
- ret
- getccl:
- ;si -> source (null terminated)
- ;di -> dest, dx -> end of dest
- ;return cy=1 if error.
- inc si
- mov al,inpat[si]
- cmp al,NCCL
- jne getccl_1
- call addset
- inc si
- jmp getccl_2
- getccl_1:
- mov al,CCL
- call addset
- getccl_2:
- push bx
- mov bx,di
- call addbyte ;leave room for count
- call dodash
- mov ax,di
- sub ax,bx
- dec ax
- mov outpat[bx],al
- pop bx
- cmp inpat[si],ECCL
- je getccl_3
- stc
- ret
- getccl_3:
- clc
- ret
- dodash:
- ;si -> source pattern (null terminated)
- ;di -> destination pattern
- ;dx -> end of destination pattern
- push bx
- mov bx,si
- dodash_1:
- mov al,inpat[si]
- or al,al
- je dodash_2
- cmp al,ECCL
- je dodash_2
- cmp al,"-"
- je dodash_4
- call addbyte
- jmp dodash_8
- dodash_4:
- cmp si,bx ;at beginning?
- je dodash_5
- cmp inpat[si],NULL ;or at end?
- jne dodash_6
- dodash_5:
- mov al,"-" ;if at beginning or at end, just a "-"
- call addbyte
- jmp dodash_8
- dodash_6:
- mov al,inpat[si-1]
- cmp al,inpat[si+1]
- ja dodash_5
- call alphanumeric
- jnc dodash_5
- mov al,inpat[si+1]
- call alphanumeric
- jnc dodash_5
- mov al,inpat[si-1]
- dodash_7:
- inc al ;pre-increment -- the first one's there.
- cmp al,inpat[si+1]
- ja dodash_9
- call addbyte
- jmp dodash_7
- dodash_9:
- inc si
- dodash_8:
- inc si
- jmp dodash_1
- dodash_2:
- pop bx
- ret
- alphanumeric:
- ;return cy=1 if al is alphanumeric
- cmp al,"0"
- jb alphanumeric_1
- cmp al,"9"
- jbe alphanumeric_2
- cmp al,"A"
- jb alphanumeric_1
- cmp al,"Z"
- jbe alphanumeric_2
- cmp al,"a"
- jb alphanumeric_1
- cmp al,"z"
- jbe alphanumeric_2
- alphanumeric_1:
- clc
- ret
- alphanumeric_2:
- stc
- ret
- code ends
- end